* load mortality data from:
* 		UN World Population Prospects: The 2019 Revision 

cd "~/Documents/econ/research/current/cognitive/estimation/data"

// ---- child survival 0-5 yrs  ----
import excel using ///
	mortality2019/WPP2019_MORT_F01_2_Q5_BOTH_SEXES.xlsx, ///
	 cellrange(C18:U272) clear
rename (C E) (country countrynum)
drop if countrynum>=900 // drop all regional composites
kountry countrynum, from(iso3n) to(iso3c)
rename _ISO3C_ countrycode
drop if country=="Channel Islands" // no countrycode for Channel Islands
replace countrycode = "CUW" if country=="Curaçao"
replace countrycode = "RUS" if country=="Russian Federation"
replace countrycode = "SRB" if country=="Serbia"
replace countrycode = "SDN" if country=="Sudan"
drop countrynum D F G
rename (H I J K L M N O P Q R S T U) (cmr1950 cmr1955 cmr1960 cmr1965 cmr1970 ///
	cmr1975 cmr1980 cmr1985 cmr1990 cmr1995 cmr2000 cmr2005 cmr2010 cmr2015)
// reshape by year
reshape long cmr, i(country countrycode) j(year)
destring cmr, replace
gen csr = round(1000-cmr,.1)
label var csr "child survival rate (/1000)"
drop cmr
sort countrycode year
save survive, replace

// ---- adult survival 15-60 yrs ----
import excel using ///
	mortality2019/WPP2019_MORT_F11_1_Q1545_BOTH_SEXES.xlsx, ///
	 cellrange(C18:U272) clear
rename (C E) (country countrynum)
drop if countrynum>=900 // drop all regional composites
kountry countrynum, from(iso3n) to(iso3c)
rename _ISO3C_ countrycode
drop if country=="Channel Islands" // no countrycode for Channel Islands
replace countrycode = "CUW" if country=="Curaçao"
replace countrycode = "RUS" if country=="Russian Federation"
replace countrycode = "SRB" if country=="Serbia"
replace countrycode = "SDN" if country=="Sudan"
drop countrynum D F G
rename (H I J K L M N O P Q R S T U) (amr1950 amr1955 amr1960 amr1965 amr1970 ///
	amr1975 amr1980 amr1985 amr1990 amr1995 amr2000 amr2005 amr2010 amr2015)
// reshape by year
reshape long amr, i(country countrycode) j(year)
destring amr, replace
gen asr = round(1000-amr,.1)
label var asr "adult survival rate aged 15-60 (/1000)"
drop amr
merge 1:1 countrycode year using survive
drop _merge
sort countrycode year
save survive, replace

// ---- life expectancy ----
import excel using ///
	mortality2019/WPP2019_MORT_F07_1_LIFE_EXPECTANCY_0_BOTH_SEXES.xlsx, ///
	 cellrange(C18:U272) clear
rename (C E) (country countrynum)
drop if countrynum>=900 // drop all regional composites
kountry countrynum, from(iso3n) to(iso3c)
rename _ISO3C_ countrycode
drop if country=="Channel Islands" // no countrycode for Channel Islands
replace countrycode = "CUW" if country=="Curaçao"
replace countrycode = "RUS" if country=="Russian Federation"
replace countrycode = "SRB" if country=="Serbia"
replace countrycode = "SDN" if country=="Sudan"
drop countrynum D F G
rename (H-U) (e01950 e01955 e01960 e01965 e01970 ///
	e01975 e01980 e01985 e01990 e01995 e02000 e02005 e02010 e02015)
// reshape by year
reshape long e0, i(country countrycode) j(year)
destring e0, replace
label var e0 "Life expectancy at birth"
merge 1:1 countrycode year using survive
drop _merge
sort countrycode year
save survive, replace


// ---- lagged values ----
encode countrycode, gen(isonum)
xtset isonum year, delta(5)
local v "csr" 
by isonum: gen `v'_20 = (l4.`v'+l5.`v'+l6.`v'+l7.`v')/4
label var csr_20 "Child Survival Rate lagged 20-35 years"
drop isonum
order country countrycode year csr* asr e0
saveold survive, replace version(11)



// ---- old code using life table to calculate survival rates ----

/*
import excel using ///
	mortality/WPP2015_MORT_F17_1_ABRIDGED_LIFE_TABLE_BOTH_SEXES.xls, ///
	 cellrange(C18:R59544) clear
rename (C-R) (country notes countrynum years age age_int ///
	mx qx_str px_str lx dx Lx Sx_str Tx ex ax)
gen double qx = real(qx_str)
gen double px = real(px_str)
gen double Sx = real(Sx_str)
gen int year = real(substr(years,1,4))
gen int yearend = year+5
drop notes years

label var age 		"Age (x)"
label var age_int 	"Age interval (n)"	
label var mx 		"Central death rate m(x,n)"
label var qx 		"Probability of dying q(x,n)"
label var px 		"Probability of surviving p(x,n)"
label var lx 		"Number of survivors l(x)"
label var dx 		"Number of deaths d(x,n)"
label var Lx 		"Number of person-years lived L(x,n)"
label var Sx 		"Survival ratio S(x,n)"
label var Tx 		"Person-years lived T(x)"
label var ex 		"Expectation of life e(x)"
label var ax 		"Average number of years lived a(x,n)"
	
drop if countrynum>=900 // drop all regional composites
kountry countrynum, from(iso3n) to(iso3c)
rename _ISO3C_ countrycode
drop if country=="Channel Islands" // no countrycode for Channel Islands
replace countrycode = "CUW" if country=="Curaçao"
replace countrycode = "RUS" if country=="Russian Federation"
replace countrycode = "SRB" if country=="Serbia"
replace countrycode = "SDN" if country=="Sudan"

drop countrynum
order country countrycode year yearend age-ax
sort countrycode age year
saveold mortality, replace version(11)

// Survival Rates 0-5, 5-15, 15-60 60-90
//		= product of Sx in each age category
use mortality, clear
recode age (0/1=1) (5/10=2) (15/55=3) (60/70=4) (75/85=5), gen(agegrp)
gen double S = ln(Sx) // product is sum of logs
collapse (sum) S, by(countrycode year agegrp)
replace S = exp(S)
reshape wide S, i(countrycode year) j(agegrp)
rename (S1 S2 S3 S4 S5) (csr ysr asr osr vsr)
encode countrycode, gen(isonum)
xtset isonum year, delta(5)
bysort isonum: gen csr_15 = l3.csr
by isonum: gen csr_1535 = (l3.csr + l4.csr + l5.csr + l6.csr + l7.csr)/5
label var csr "Child Survival Rate (age 0-5)"
label var csr_15 "Child Survival Rate lagged 15 years"
label var csr_1535 "Child Survival Rate lagged 15-35 years"
label var ysr "Youth Survival Rate (age 5-15)"
label var asr "Adult Survival Rate (age 15-60)"
label var osr "Old Age Survival Rate (age 60-75)"
label var vsr "Very Old Age Survival Rate (age 75-90)"
merge 1:1 countrycode year using survive
drop _merge isonum
order country countrycode year csr*
saveold survive, replace version(11)

// ---- young women survival ----
import excel using ///
	mortality2019/WPP2019_MORT_F17_3_ABRIDGED_LIFE_TABLE_FEMALE.xlsx, ///
	 cellrange(C18:N77381) clear
rename (C-N) (country notes countrynum reg_type agg_code years age age_int ///
	mx_str qx_str px_str lx_str)
gen double lx = real(lx_str)
gen int year = real(substr(years,1,4))
gen int yearend = year+5
label var age 		"Age (x)"
label var lx 		"Number of survivors l(x)"
// only need lx15 and lx35 to calculate 20_p_15
keep if age==15 | age==35
keep country countrynum year age lx
reshape wide lx, i(country countrynum year) j(age)
gen ywsr = lx35/lx15
label var ywsr 		"Young women survival rate (age 15-35)"
drop if countrynum>=900 // drop all regional composites
kountry countrynum, from(iso3n) to(iso3c)
rename _ISO3C_ countrycode
drop if country=="Channel Islands" // no countrycode for Channel Islands
replace countrycode = "CUW" if country=="Curaçao"
replace countrycode = "RUS" if country=="Russian Federation"
replace countrycode = "SRB" if country=="Serbia"
replace countrycode = "SDN" if country=="Sudan"
keep countrycode year ywsr
merge 1:1 countrycode year using survive
drop _merge
sort countrycode year
save survive, replace

